home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/perl
- #
- # splithtml {-c cfg_file} {-l lvl_id} {-f fname_spec} {-v} {-noclean} input_f
- #
- # Based on a given level identifier (1,2,3...), matching what is
- # found in the TOC data file (default.dat), this sub-program splits
- # up the full HTML file into sep. HTML files. It also handles the
- # updating of any and all references (including those in the TOC
- # data files).
- #
- # requires perl5
-
- # pull in the default configuration file; all local referenced
- # config files override whatever is in here
- #
-
- $| = 1;
-
- my($prog_dir) = $0;
- if( $prog_dir =~ /\// ) {
- $prog_dir =~ s/splithtml$//;
- $prog_dir .= 'splithtml.cfg';
- require "$prog_dir";
- } else {
- require 'splithtml.cfg';
- }
-
- $_fname_spec = '';
- $_lvl_id = -1;
- $_bVerbose = 0;
- $_bClean = 1;
- $_bHelpExist = 0;
- $_indxExist = '';
- $_output_dir = '';
- $_input_file = '';
-
- $_input_dir = '';
- $_ttl = '';
- $_curr_title = '';
-
- %_element_map = ();
-
- @_break_elements = ();
- @_break_titles = ();
-
-
- # read in cmd-line arguments
- #
- while(1) {
-
- if ($ARGV[0] eq "-c") {
- shift(@ARGV);
- require "$ARGV[0]";
- shift(@ARGV);
- } elsif ($ARGV[0] eq "-l") {
- shift(@ARGV);
- $_lvl_id= ($ARGV[0] + 0);
- shift(@ARGV);
- } elsif ($ARGV[0] eq "-f") {
- shift(@ARGV);
- $_fname_spec = $ARGV[0];
- shift(@ARGV);
- } elsif ($ARGV[0] eq "-o") {
- shift(@ARGV);
- $_output_dir = $ARGV[0];
- shift(@ARGV);
- } elsif ($ARGV[0] eq "-v") {
- $_bVerbose = 1;
- shift(@ARGV);
- } elsif ($ARGV[0] eq "-noclean") {
- $_bClean = 0;
- shift(@ARGV);
- } else {
- last;
- }
- }
- $_input_file = $ARGV[(@ARGV + 0) - 1];
-
-
- # see if any problems with arguments, as specified; also set up defaults
- #
- if( $_input_file eq '' ) {
- &usage();
- exit(0);
- }
-
- my($i) = rindex($_input_file, "/");
- if($i == -1) {
- $_input_dir = ".";
- } else {
- $_input_dir = substr($_input_file, 0, $i);
- }
-
- if( $_output_dir eq '' ) {
- $_output_dir = $_input_dir;
- }
-
- if( !(-w $_output_dir) ) {
- &usage();
- print "ERROR: Cannot write to $_output_dir\n";
- exit(-1);
- }
-
- if( $_fname_spec eq '' ) {
- $_fname_spec = $DEF_FNAME_SPEC;
- }
- $_fname_spec =~ s/^\///;
-
- if( $_lvl_id < 0 ) {
- $_lvl_id = $DEF_CHUNK_ID;
- }
- if( $_lvl_id <= 0 || $_lvl_id > 5 ) {
- $_lvl_id = 1;
- }
-
- if( $_bVerbose == 1 ) {
- print "\n\nsplithtml: will read from $_input_dir\n\n";
- print "splithtml: processing $_input_dir/default.dat\n\n";
- }
-
-
- # read in table of contents data file, configure what the filename(s) will be
- #
- if( &readToc("$_input_dir/default.dat") == 0 ) {
- &usage();
- print "ERROR: Cannot read from $_input_dir/default.dat\n";
- exit(-1);
- }
- if( $_bVerbose == 1 ) {
- print "splithtml: ", (@_break_elements+0), " breaking elements found\n\n";
- print "splithtml: processing $_input_file\n\n";
- }
-
-
- # read the master html file and break it up, fixing href's along the way
- #
- if( &createContentFiles($_input_file) == 0 ) {
- &usage();
- print "ERROR: Cannot read from $_input_file\n";
- exit(-1);
- }
-
-
- # build the toc structures/files
- #
- if( $_bVerbose == 1 ) {
- print "splithtml: processing TOC file(s)\n\n";
- }
-
- &createHelpTopics();
-
- if( &createTocFiles() == 0 ) {
- &usage();
- print "ERROR: Cannot create TOC file(s)\n";
- exit(-1);
- }
-
-
- # clean up
- #
- if( $_bClean == 1 ) {
- if( $_bVerbose == 1 ) {
- print "splithtml: cleaning up...\n\n";
- }
-
- my($cmd) = "/bin/rm -f $_input_dir/\*.dat $_input_file";
- system($cmd);
- print "ERROR: ($!) executing '$cmd'\n" if ($?);
- }
-
-
- exit(0);
-
-
-
- #
- # void usage()
- #
- sub usage {
-
- print "\n\n",
-
- "usage: splithtml {-c cfg_file} {-l lvl_id} {-f fname_spec}\n",
- " {-o output_dir} {-v} {-noclean} in_html_file\n\n",
- "-c cfg_file : is the configuration file to use. See the default file:\n",
- " /usr/lib/Insight/dweb/dtl2html/splithtml.cfg for info.\n\n",
- "-l lvl_id : is the level to break/chunk on (based on TOC)\n",
- " default used is \"1\"\n\n",
- "-f fname_spec : is the filename convention to use, must have a\n",
- " \"%d\" within the string, as in \"sgi%05d.html\"\n",
- " which serves as the default is not provided.\n\n",
- "-o output_dir : location to write the output files to, default is the\n",
- " same directory the in_html_file is from.\n\n",
- "-v : operate in verbose mode.\n\n",
- "-noclean : do not remove the old fullbook.htm{l} file and it's\n",
- " associated TOC data files. They are removed by default.\n\n",
- "in_html_file : REQUIRED. Name of the fullbook.htm{l} file to \n",
- " process. Also denotes location of the TOC data\n",
- " files. Files created will be written to the same\n",
- " area, unless otherwise designated\n\n";
- }
-
-
-
- #
- # int readToc(string $tocfile)
- #
- sub readToc {
-
- my($f) = @_;
-
- my(@c) = ();
- my($i,$j) = 1;
- my($curr_fname,$s) = '';
-
- open(TOCF, $f) || return 0;
- while(<TOCF>) {
-
- chop;
- $_ =~ s/^\s//g;
- $_ =~ s/\s$//g;
- @c = split('\|', $_);
- $c[1] =~ s/^[\ \s]+//;
- $c[1] =~ s/[\ \s]+$//;
-
- # this is the title line
- #
- if( $c[0] eq '0' && $_ttl eq '' ) {
- $_ttl = $c[1];
- next;
- }
-
- # break at these levels; configure filename to use
- #
- if( ($c[0] + 0) <= $_lvl_id ) {
-
- # store only those that we are breaking on
- #
- push(@_break_elements, $c[2]);
- push(@_break_titles, $c[1]);
-
- $curr_fname = sprintf($_fname_spec, $i++);
- }
-
- # use as quick lookup to determine what each id is mapped to
- #
- $s = $curr_fname . ($c[2] =~ /^#/ ? '' : '#') . $c[2];
- $_element_map{"$c[2]"} = $s;
-
-
- # back of the book index
- #
- if( $c[1] =~ /^Index$/i ) {
- $_indxExist = $curr_fname;
- }
- }
-
- close(TOCF);
- return 1;
- }
-
-
-
- #
- # int createContentFiles(string $mainHtml)
- #
- sub createContentFiles {
-
- my($f) = @_;
-
- my($id, $curr_fname, $s, $tmp) = '';
- my($i, $j) = 0;
- my($curr_ptr) = -1;
-
- open(FP, $f) || return 0;
- while(<FP>) {
-
- if( $_ =~ /^<\!\-\-\ SGIEND\:/ ) {
-
- if( $curr_fname ne '' ) {
- $_curr_title = $_break_titles[$curr_ptr];
- $s = &std_hdrftr($curr_ptr, $STD_FTR);
- print FP_DOC $s;
- close(FP_DOC);
- }
-
- last;
- }
-
- # found a marker, start of a section
- #
- if( $_ =~ /^<\!\-\-\ SGITOC\:/ && $_ =~ /\|(SGI_[\d]+)/ ) {
-
- $id = $1;
-
- # see if we need to break and create a new file
- #
- for( $i=0, $j=-1; $i < (@_break_elements + 0); $i++ ) {
-
- if($_break_elements[$i] eq $id
- ||
- $_break_elements[$i] eq "#$id" ) {
- $j = $i;
- $i = (@_break_elements + 0);
- }
- }
-
- if( $j != -1 ) {
-
- if( $curr_fname ne '' ) {
- $_curr_title = $_break_titles[$curr_ptr];
- $s = &std_hdrftr($curr_ptr, $STD_FTR);
- print FP_DOC $s;
- close(FP_DOC);
- }
-
- $curr_ptr = $j;
- $curr_fname = $_output_dir . '/' .
- $_element_map{"$_break_elements[$j]"};
- $curr_fname =~ s/\#\w+$//;
-
- if( $_bVerbose == 1 ) {
- print "splithtml: creating $curr_fname\n\n";
- }
-
- open(FP_DOC, "> $curr_fname") || return 0;
- $_curr_title = $_break_titles[$curr_ptr];
- $s = &std_hdrftr($curr_ptr, $STD_HDR);
- print FP_DOC $s;
- }
- }
-
- # adjust all element references
- #
- if( $curr_fname ne '' ) {
-
- foreach $s (keys %_element_map) {
- $tmp = $_element_map{$s};
- $_ =~ s/\"$s\"/\"$tmp\"/g;
- }
- print FP_DOC $_;
- }
- }
- close(FP);
-
- return 1;
- }
-
-
-
- #
- # int createTocFiles()
- #
- sub createTocFiles {
-
- my(@c) = ('default', 'figures', 'tables', 'examples');
- my(@tocf,@copy_tocf) = ();
- my($i,$j) = 1;
- my($str,$tmp,$pat,$f,$s,$exp_s,$exp_f,$tt) = '';
-
- foreach (@c) {
- $s = $_input_dir . '/' . $_ . '.dat';
- if( -r $s ) {
- push(@tocf, $_);
- push(@copy_tocf, $_);
- }
- }
-
- foreach $tt (@tocf) {
-
- $s = "$_input_dir/$tt" . '.dat';
- open(TOC_IN_F, $s) || return 0;
-
- $f = "$_output_dir/" . ($tt eq "default" ? 'index' : $tt) . '.html';
- open(TOC_OUT_F, "> $f") || return 0;
- if( $_bVerbose == 1 ) {
- print "splithtml: creating $f\n\n";
- }
-
- if( $tt eq "default" ) {
-
- # setup an expanded toc file
- #
- $exp_f = "$_output_dir/toc_full.html";
- open(TOC_EXP_OUT_F, "> $exp_f") || return 0;
- }
-
- $_curr_title = $TOC_TITLES{$tt};
- $s = &std_hdrftr(1, $STD_TOC_HDR);
-
- if( $_bHelpExist == 1 ) {
- $tmp = "<A HREF=\"help.html\">" . $TOC_TITLES{'help'} . "</A>";
- } else {
- $tmp = " ";
- }
- $s =~ s/%%LHELP/$tmp/g;
-
- if( $_indxExist ne '' ) {
- $tmp = "<A HREF=\"$_indxExist\">" . $TOC_TITLES{'indx'} . "</A>";
- } else {
- $tmp = " ";
- }
- $s =~ s/%%LINDX/$tmp/g;
-
- foreach $f (@copy_tocf) {
- if( $f ne $tt ) {
- $tmp = "<A HREF=\"" . ($f eq "default" ? 'index' : $f) .
- ".html\">" . $TOC_TITLES{$f} . "</A>";
- } else {
- $tmp = $TOC_TITLES{$f};
- }
- $pat = '%%L' . uc($f);
- $s =~ s/$pat/$tmp/g;
- }
-
- if( $tt eq "default" ) {
-
- $s =~ s/%%LBORDER/1/;
- $exp_s = $s;
-
- $tmp = "<A HREF=\"toc_full.html\">" . $TOC_TITLES{'expand'} ."</A>";
- $s =~ s/%%LEXPAND/$tmp/;
- $tmp = $TOC_TITLES{'collapse'};
- $s =~ s/%%LCOLLAPSE/$tmp/;
-
- $tmp = "<A HREF=\"index.html\">" . $TOC_TITLES{'collapse'} ."</A>";
- $exp_s =~ s/%%LCOLLAPSE/$tmp/;
- $tmp = $TOC_TITLES{'expand'};
- $exp_s =~ s/%%LEXPAND/$tmp/;
-
- $exp_s =~ s/%%L\w+//g;
- print TOC_EXP_OUT_F $exp_s;
-
- } else {
- $s =~ s/%%LBORDER/0/;
- }
-
- $s =~ s/%%L\w+//g;
- print TOC_OUT_F $s;
-
- while(<TOC_IN_F>) {
-
- chop;
- @c = split('\|', $_);
-
- if( $c[0] eq '0' ) {
- next;
- }
-
- # see if file actually exists, if not, do not print this item
- #
- $str = $_element_map{$c[2]};
- if( $str eq '' ) {
- next;
- }
-
- $str =~ s/\#[\w]+$//;
- $str = "$_output_dir/" . $str;
-
- if( -r $str ) {
- if( $tt eq "default" && ($c[0] + 0) == 1 ) {
- print TOC_OUT_F "\n<BR>\n";
- print TOC_EXP_OUT_F "\n<BR>\n";
- }
-
- if( $tt eq "default" ) {
-
- if( ($c[0] + 0) < $DEF_EXP_LEVEL ) {
- print TOC_OUT_F
- (' ' x ($c[0] + 0)),
- "<A HREF=\"", $_element_map{$c[2]}, "\">",
- $c[1], "</A><BR>\n";
- }
- print TOC_EXP_OUT_F
- (' ' x ($c[0] + 0)),
- "<A HREF=\"", $_element_map{$c[2]}, "\">",
- $c[1], "</A><BR>\n";
-
- } else {
- print TOC_OUT_F (' ' x ($c[0] + 0)),
- "<A HREF=\"", $_element_map{$c[2]}, "\">",
- $c[1], "</A><BR>\n";
- }
- }
- }
- close(TOC_IN_F);
-
- $s = &std_hdrftr(1, $STD_TOC_FTR);
- print TOC_OUT_F $s;
- close(TOC_OUT_F);
-
- if( $tt eq "default" ) {
- print TOC_EXP_OUT_F $s;
- close(TOC_EXP_OUT_F);
- }
- }
-
- return 1;
- }
-
-
-
- #
- # pick up any helpid's and create a simple structure
- #
- sub createHelpTopics {
-
- my($tmp,$s) = '';
- $tmp = $_fname_spec;
- $tmp =~ s/\%[\d]*d/\*/g;
-
- $s = "grep 'SGI_HELPID:' $_output_dir/$tmp";
- open(HELP_IN, "$s |") || return;
-
- $s = "$_output_dir/help.html";
- open(HELP_OUT, "> $s") || return;
-
- if( $_bVerbose == 1 ) {
- print "splithtml: creating help topics file ($s)\n\n";
- }
-
- $s = $STD_HDR;
- $s =~ s/%%BK_TITLE/$_ttl/g;
- $s =~ s/%%CURR_TITLE/$TOC_TITLES{'help'}/g;
- $s =~ s/%%PREV_URL/ /g;
- $s =~ s/%%NEXT_URL/ /g;
- $s .= "\n\n<UL>\n";
- print HELP_OUT $s;
-
- my($fname, $id, $title, $ht_set) = '';
-
- while(<HELP_IN>) {
- chop;
- ($fname, $id, $title) = '';
-
- if( $_ =~ /\/([\w\.]+):/ ) {
- $fname = $1;
- }
- if( $_ =~ /\ NAME=\"([\ \!#-~]+)\"/ ) {
- $id = $1;
- }
- if( $_ =~ /\/A>([\ -~]+)/ ) {
- $title = $1;
- $title =~ s/<[\w]{1,4}>//g;
- $title =~ s/<[\/]{1}[\w]{1,4}>//g;
- }
-
- print HELP_OUT "<LI><A href=\"$fname#$id\">$title</A></LI>\n";
-
- $ht_set .= "<!-- " . "$id|$fname#$id|$title" . " -->\n";
- $found_one = 1;
- }
- close(HELP_IN);
-
- $s = "\n</UL>\n\n<!-- START SGI_HELPTOPICS -->\n" . $ht_set .
- "<!-- END SGI_HELPTOPICS -->\n\n";
- $s .= $STD_FTR;
- $s =~ s/%%BK_TITLE/$_ttl/g;
- $s =~ s/%%CURR_TITLE/$TOC_TITLES{'help'}/g;
- $s =~ s/%%PREV_URL/ /g;
- $s =~ s/%%NEXT_URL/ /g;
- print HELP_OUT $s;
-
- close(HELP_OUT);
-
- # no help topics found
- #
- if( $ht_set eq '' ) {
- unlink("$_output_dir/help.html");
- } else {
- $_bHelpExist = 1;
- }
- }
-
-
-
- #
- # header and footer methods
- #
- sub std_hdrftr {
-
- my($s, $in_buf) = @_;
- my($id) = ($s + 0);
- my($str, $tmp,$buf) = '';
-
- $buf = $in_buf;
- $buf =~ s/%%BK_TITLE/$_ttl/g;
- $buf =~ s/%%CURR_TITLE/$_curr_title/g;
-
- if($id == 0) {
- $tmp = ' ';
- } else {
- $str = $_element_map{"$_break_elements[($id - 1)]"};
- $str =~ s/\#[\w]+$//;
- $tmp = "<A HREF=\"$str\">" . $_break_titles[($id - 1)] . "</A>";
- }
- $buf =~ s/%%PREV_URL/$tmp/g;
-
- if($id >= ((@_break_elements + 0) - 1)) {
- $tmp = ' ';
- } else {
- $str = $_element_map{"$_break_elements[($id + 1)]"};
- $str =~ s/\#[\w]+$//;
- $tmp = "<A HREF=\"$str\">" . $_break_titles[($id + 1)] . "</A>";
- }
- $buf =~ s/%%NEXT_URL/$tmp/g;
-
- return "$buf";
- }
-
-